
*global path = "C:/Users/KShih/Dropbox/Work/Research Projects/Active/chinese_students_2017/"
*global path = "C:/Users/Kevin Shih/Dropbox/Work/Research Projects/Active/chinese_students_2017/"
global path = "C:\Users\creto\Desktop\chinese_students_2017\"
 
 
//OPEN LOG FILE
*cap log close
*log using "$path/Logfiles/analysis_expweighted_08feb2021.log", replace


clear
set more off
set matsize 10000



/****************************************************************************/

****************       STEP 1: LOAD DATA     **************

/****************************************************************************/


****** NEW DATA FILE FOR REPLICATION CREATED BY KEVIN *******

use "$path/Submissions/ReStat/Replication Files 2022/kswxy_data.dta", clear
	
			*---------------------------------------------------------------------------*			
/****************************************************************************/

*************   SUMMARY STATS TABLE ***************

******************************************************************************

*----------------------------------------------------------------------------*
	drop if citycode==.
	
	gen sample = 1 if balanced==1 & popwt03!=. 
	egen minsample = min(sample), by(citycode)
	drop sample
	rename minsample sample

	gen exports = dvalue/10000
	
	gen gdppc_true = gdppc*10
	
	gen stud_ot = stud_1 + stud_2 + stud_7 + stud_8
	gen stud_nonfunded = stud_tot - stud_univ_funded
	gen gdp_true = gdp*10000
	gen ln_gdp_true=ln(gdp_true)
	
	foreach x in as ba ma dr ot stem ss arts bus hi_admrate_p0_25 hi_admrate_p25_50 hi_admrate_p50_75 hi_admrate_p75_100 univ_funded nonfunded {
	gen sh_`x' = stud_`x'/stud_tot
	}
	
	
	//CALCULATE SUMMARY STATS HERE (ADD NEW VARIABLES TO THE FOREACH LOOP)
	foreach var of varlist stud_tot sh_as sh_ba sh_ma sh_dr sh_ot sh_stem sh_ss sh_arts ///
						   sh_hi_admrate_p0_25 sh_hi_admrate_p25_50 sh_hi_admrate_p50_75 sh_hi_admrate_p75_100 ///
						   sh_univ_funded sh_nonfunded ln_dvalue pop ln_gdp_true gdp exports gdppc_true {
						   
			eststo `var'_0: estpost tabstat `var' if year==2000 & sample==1, statistics(mean sd N) columns(statistics) listwise
			distinct citycode if year==2004 & sample==1
			estadd scalar cities=r(ndistinct)

			eststo `var'_1: estpost tabstat `var' if year==2013 & sample==1, statistics(mean sd N) columns(statistics) listwise
			distinct citycode if year==2013 & sample==1
			estadd scalar cities=r(ndistinct)
		}	
	
/*		
//TABLE BEGINS HERE (NOTE: FIRST ESTTAB CANNOT HAVE THE prehead(""). LAST ESTTAB CANNOT HAVE postfoot(""). ALL THE ONES IN BETWEEN MUST HAVE postfoot("")/
	
	*pop gdp exports total students
	esttab pop_0 pop_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", replace unstack booktabs ///
	 main(mean %12.0fc) aux(sd %12.0fc) ///
	nodepvars noobs nonotes nostar mtitle("2000" "2013") prefoot("") postfoot("") collabels(none) ///
	varlabels(pop "Population (in 000s)") 
	
	
	esttab gdp_0 gdp_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", append unstack booktabs ///
	 main(mean %12.0fc) aux(sd %12.0fc) ///
	nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("") postfoot("") collabels(none) ///
	varlabels(gdp "GDP (in 10,000 RMB)")
	
	esttab gdppc_true_0 gdppc_true_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", append unstack booktabs ///
	 main(mean %12.0fc) aux(sd %12.0fc) ///
	nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("") postfoot("") collabels(none) ///
	varlabels(gdppc_true "GDP per capita (in RMB)")
	
	esttab exports_0 exports_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", append unstack booktabs ///
	 main(mean %12.0fc) aux(sd %12.0fc) ///
	nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("") postfoot("") collabels(none) ///
	varlabels(exports "Exports (in 10,000 RMB)")
	
	esttab stud_tot_0 stud_tot_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", append unstack booktabs ///
	 main(mean %12.0fc) aux(sd %12.0fc) ///
	nodepvars noobs nonotes nostar nonumbers nomtitles prehead("\\") posthead("") prefoot("") postfoot("") collabels(none) ///
	varlabels(stud_tot "Students Entering\\US Higher Ed\\Per 1M City Residents") 
	
	*academic level
	esttab sh_as_0 sh_as_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("\textit{\underline{Academic Level:}} \\") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_as "Associates") append

	esttab sh_ba_0 sh_ba_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_ba "Bachelors") append

	esttab sh_ma_0 sh_ma_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_ma "Masters") append

	esttab sh_dr_0 sh_dr_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_dr "Doctorate") append
	
	esttab sh_ot_0 sh_ot_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_ot "Other") append 


	*field of study 
	esttab sh_stem_0 sh_stem_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("\textit{\underline{Field of Study:}} \\") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_stem STEM) append
		
	esttab sh_ss_0 sh_ss_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack  label ///
	 main(mean %12.2fc) aux(sd %12.2fc) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") postfoot("") prefoot("")  ///
	 varlabels(sh_ss "Social Science") append 
	 
	esttab sh_arts_0 sh_arts_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("") postfoot("")  ///
	 varlabels(sh_arts "Arts/Humanities") append
	 
	*admission rate
	
	esttab sh_hi_admrate_p0_25_0 sh_hi_admrate_p0_25_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f)  ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("\textit{\underline{University Admissions Rate:}} \\") posthead("") prefoot("")  postfoot("")  ///
	 varlabels(sh_hi_admrate_p0_25 "Tier 1 - 1st Quartile") append
	
	esttab sh_hi_admrate_p25_50_0 sh_hi_admrate_p25_50_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("")  postfoot("")  ///
	 varlabels(sh_hi_admrate_p25_50 "Tier 2 - 2nd Quartile") append
	
	esttab sh_hi_admrate_p50_75_0 sh_hi_admrate_p50_75_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("")  postfoot("")  ///
	 varlabels(sh_hi_admrate_p50_75 "Tier 3 - 3rd Quartile") append
	
	esttab sh_hi_admrate_p75_100_0 sh_hi_admrate_p75_100_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("")  postfoot("")  ///
	 varlabels(sh_hi_admrate_p75_100 "Tier 4 - 4th Quartile") append
	
	*funding
	esttab sh_univ_funded_0 sh_univ_funded_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("\textit{\underline{Scholarship Funding:}} \\") posthead("") prefoot("")  postfoot("")  ///
	 varlabels(sh_univ_funded "Received Funding") append
	
	esttab sh_nonfunded_0 sh_nonfunded_1 using "$path/Tables/revisions2022/table_summary_stats_12may2022.tex", unstack ///
	 main(mean %12.2f) aux(sd %12.2f) stats(cities, fmt(%12.0f) labels("Number of Cities")) ///
	 nodepvars noobs nonotes nostar nonumbers nomtitles prehead("") posthead("") prefoot("\hline")  collabels(none) ///
	 varlabels(sh_nonfunded "No Funding") append
	
	
*/
				
	*---------------------------------------------------------------------------*			
/****************************************************************************/

*************   REGRESSION ANALYSIS   ***************

******************************************************************************
*----------------------------------------------------------------------------*

//SET CONTROLS:
global controls "contract_cons_exw97 chinatariff00_expw97 inputtariff02_expw97 share_revenue_exw97" 


**DROP CITY MISSING OCDE
drop if citycode==.



		egen iv_bin = cut(iv_ntr), group(40)
		egen mean_iv_ntr=mean(iv_ntr), by(iv_bin)
		egen total_pop=total(pop), by(iv_bin)
		
		bys citycode: gen pct_dvalue_0013 = (dvalue - dvalue[_n-13])/dvalue[_n-13] if year==2013
		
			**** Shenzhen and Bejing are outlier city
		gen no_outlier_tot_0413=shc_stud_tot_0413 if (citycode!=4403 & citycode!=1100) 
		gen no_outlier_tot_0213=shc_stud_tot_0213 if (citycode!=4403 & citycode!=1100) 
		gen no_outlier_tot_0013=shc_stud_tot_0013 if (citycode!=4403 & citycode!=1100) 
		gen no_outlier_tot_0001 = shc_stud_tot_0001_pop04 if  (citycode!=4403 & citycode!=1100) 
		gen lnc_numstd_midschool2_9700 = lnc_numstd_midschool_9700 if  (citycode!=4403 & citycode!=1100) 
		gen lnc_num_midschool2_9700 = lnc_num_midschool_9700 if  (citycode!=4403 & citycode!=1100) 
					  
				
		label var shc_stud_tot_0213 "Growth in Students, 2002-2013"
		label var shc_stud_tot_0013 "Growth in Students, 2000-2013"
		label var no_outlier_tot_0213 "Growth in Students, 2002-13 (no outliers)"	
		label var no_outlier_tot_0013 "Growth in Students, 2000-13 (no outliers)"	
		label var no_outlier_tot_0001 "Growth in Students, 2000-1  (no outliers)"	
		label var shc_stud_tot_0001_pop04 "Students 2000-01 per 1000"
	label var lnc_gdp_9700 "Log Change in GDP, 1997-2000"
	label var lnc_emp_9700 "Log Change in Employment, 1997-2000"
	label var lnc_inv_fdi_9700 "Log Change in FDI, 1997-2000"
	label var lnc_inv_realest_9700 "Log Change in RE Investment, 1997-2000"
	label var lnc_dvalue_9700 "Exports, 1997-2000"
	label var lnc_numstd_college_9700 "College Students, 1997-2000"
	label var lnc_num_college_9700 "Number of Colleges, 1997-2000"
	label var lnc_numstd_midschool2_9700 "Middle School Students, 1997-2000"
	label var lnc_num_midschool2_9700 "Number of Middle Schools, 1997-2000"
	label var lnc_dvalue_0013 "Exports, 2000-2013"

	
	**** FIGURE 4 *********
	******* THIS IS NOW FIGURE 3 ACTUALLY****************
	
	foreach var in lnc_dvalue_0013 no_outlier_tot_0013 shc_stud_tot_0013 no_outlier_tot_0001 {
		egen mean_`var'=mean(`var'), by(iv_bin)	
		
		**** MZ ADD
		reg `var' iv_ntr  if year==2013 & balanced==1, cluster(citycode)
		loc b =  _b[iv_ntr] // get the beta
		loc t =  _b[iv_ntr]/_se[iv_ntr]  // get the t-statistic
		loc bf: di %12.3f `b'
		loc tf: di %12.3f `t'
		loc p : di %12.3f 2*ttail(e(df_r),abs(`t'))
		
	 
	}
	
	
		**** FIGURE 5 ********* (NOW FIG 4)

	***3** EVENT STUDY.
		sort citycode year
			
			**CONSTRUCT YEAR ON YEAR CHANGES, STANDARDIZED BY 2000 POP
			foreach lvl in as ba ma dr tot {
		
				bys citycode: gen shc_stud_`lvl'_0102 = (stud_`lvl'[_n-11] - stud_`lvl'[_n-12]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0203 = (stud_`lvl'[_n-10] - stud_`lvl'[_n-11]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0304 = (stud_`lvl'[_n-9] - stud_`lvl'[_n-10]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0405 = (stud_`lvl'[_n-8] - stud_`lvl'[_n-9]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0506 = (stud_`lvl'[_n-7] - stud_`lvl'[_n-8]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0607 = (stud_`lvl'[_n-6] - stud_`lvl'[_n-7]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0708 = (stud_`lvl'[_n-5] - stud_`lvl'[_n-6]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0809 = (stud_`lvl'[_n-4] - stud_`lvl'[_n-5]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_0910 = (stud_`lvl'[_n-3] - stud_`lvl'[_n-4]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_1011 = (stud_`lvl'[_n-2] - stud_`lvl'[_n-3]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_1112 = (stud_`lvl'[_n-1] - stud_`lvl'[_n-2]) / (pop[_n-13]) if year==2013
				bys citycode: gen shc_stud_`lvl'_1213 = (stud_`lvl' - stud_`lvl'[_n-1]) / (pop[_n-13]) if year==2013
				
			}
					
				**EVENT STUDY ANALYSIS
				gen b = .
				gen ciu95 = .
				gen cil95 = .
				local i =1
				foreach t in 0001 0102 0203 0304 0405 0506 0607 0708 0809 0910 1011 1112 1213 {
				eststo N_`t': reg shc_stud_tot_`t' iv_ntr if year==2013 & balanced==1, cluster(citycode)
				
				local y = 1999 + `i'
				replace b = _b[iv_ntr] if year == `y'
				replace ciu95 = _b[iv_ntr] + 1.96*_se[iv_ntr] if year == `y'
				replace cil95 = _b[iv_ntr] - 1.96*_se[iv_ntr] if year == `y'
				local i = `i'+1
				
				}
		 
				
				foreach var in   no_outlier_tot_0213 shc_stud_tot_0213 { 
						gen `var'_yoy = `var'/11
						egen mean_`var'_yoy=mean(`var'_yoy), by(iv_bin)
					}	
					
		
		
				sum iv_ntr if year==2013 & balanced==1, de
				local iqr_ivntr = r(p75) - r(p25)
   	
	
					
					
	**** EXTRA ROBUSTNESS FOR REFEREE: ADD THE SKILL-SHARES CONTROL (RE-EXPRESSED AT CITY LEVEL)
	
	sum iv_ntr if year==2013 & balanced==1, de
				local iqr_ivntr = r(p75) - r(p25)
	label var skillsh_exw97 "Skill Intensity"
  	
	
	/* BARTIK ROBUSTNESS ON MAIN ANALYSIS */
				sum iv_ntr if year==2013 & balanced==1, de
				local iqr_ivntr = r(p75) - r(p25)
				sum ntrgap_empwtd90_isic if year==2013 & balanced==1, de
				local iqr_ivntremp90 = r(p75) - r(p25)
			
			
			
		
		rename stud_hi_admrate_p0_25 stud_ar_p025
		rename stud_hi_admrate_p25_50 stud_ar_p2550 
		rename stud_hi_admrate_p50_75 stud_ar_p5075
		rename stud_hi_admrate_p75_100 stud_ar_p75100
		rename stud_hi_instrspend_p0_25 stud_ie_p025 
		rename stud_hi_instrspend_p25_50 stud_ie_p2550 
		rename stud_hi_instrspend_p50_75 stud_ie_p5075
		rename stud_hi_instrspend_p75_100 stud_ie_p75100
		
		rename stud_funds_p_p0_25 stud_fp_p025
		rename stud_funds_p_p25_50 stud_fp_p2550 
		rename stud_funds_p_p50_75 stud_fp_p5075 
		rename stud_funds_p_p75_100 stud_fp_p75100
		rename stud_funds_po_p0_25 stud_fpo_p025
		rename stud_funds_po_p25_50 stud_fpo_p2550
		rename stud_funds_po_p50_75 stud_fpo_p5075
		rename stud_funds_po_p75_100 stud_fpo_p75100

*construct vars to make the factor to calculate elasticities
	gen stud_ug = stud_ba+stud_ma
	foreach var in 		   tot ug ma dr ot as ba				/// Level of study
					       stem ss bus arts 					/// field of study
						   ar_p025 ar_p2550 ar_p5075 ar_p75100 	/// admission rate
						   nonfunded univ_funded 				/// funded vs non funded students
						   fpo_p025 fpo_p2550 fpo_p5075 fpo_p75100 ///level of funding 
						   hhc_p025 hhc_p2550 hhc_p5075 hhc_p75100 ///human capital level, inequality
						   {
						   					   
							sort citycode year			 
						
							bys citycode: gen sh_stud_`var'02 = stud_`var'[_n-11]/pop[_n-11] if year==2013
		}
		

		
		sort citycode year
		foreach x in ar_p025 ar_p2550 ar_p5075 ar_p75100 ie_p025 ie_p2550 ie_p5075 ie_p75100 bus stem ss arts  univ_funded ///
				     fp_p025 fp_p2550 fp_p5075 fp_p75100 fpo_p025 fpo_p2550 fpo_p5075 fpo_p75100 hhc_p50100 hhc_p050 hhc_p75100 hhc_p025 hhc_p2550 hhc_p5075 {
			
			bys citycode: gen shc_stud_`x'_0213 = (stud_`x' - stud_`x'[_n-11]) / (pop[_n-11]) if year==2013
			bys citycode: gen ch_stud_`x'_tot_0213 = (stud_`x' - stud_`x'[_n-11]) / (stud_tot - stud_tot[_n-11]) if year==2013
			bys citycode: gen chsh_stud_`x'_tot_0213 = (stud_`x'/stud_tot) - (stud_`x'[_n-11]/stud_tot[_n-11]) if year==2013
		}
	
				gen shc_stud_nonfunded_0213 = shc_stud_tot_0213 - shc_stud_univ_funded_0213  if year==2013 & balanced==1

				
		foreach x in bus stem ss arts univ_funded {
		
			foreach l in as ba ma dr ot {
			
			bys citycode: gen shc_stud_`x'_`l'_0213 = (stud_`x'_`l' - stud_`x'_`l'[_n-11]) / (pop[_n-11]) if year==2013
			bys citycode: gen ch_stud_`x'_`l'_0213 = (stud_`x'_`l' - stud_`x'_`l'[_n-11]) / (stud_`l' - stud_`l'[_n-11]) if year==2013
			bys citycode: gen chsh_stud_`x'_`l'_0213 = (stud_`x'_`l'/stud_`l') - (stud_`x'_`l'[_n-11]/stud_`l'[_n-11]) if year==2013
		}
		}
		
			gen shc_stud_ug_0213 = shc_stud_as_0213  + shc_stud_ba_0213 
			gen shc_stud_ot_0213 = shc_stud_tot_0213 - shc_stud_ug_0213 - shc_stud_ma_0213 - shc_stud_dr_0213
		
	
	
			**BY LEVEL OF STUDY	
		
			    foreach l in as ba ma dr ot tot {
				sum stud_`l' if year==2002 & balanced==1
				local `l' = r(mean)
				}
				
			
		eststo TOT: reg shc_stud_tot_0213 iv_ntr $controls if year==2013 & balanced==1 , cluster(citycode) 
				local beta_main = _b[iv_ntr]
				
				sum sh_stud_tot02
				estadd local elast = round((_b[iv_ntr]*(1/`r(mean)')*(1/10)),.01)
				
								
				
				
			
		
		
		
		foreach l in stem ss bus arts tot {
		sum stud_`l' if year==2002 & balanced==1
		local `l' = r(mean)
		}
		
		
		
		
	
	
		foreach l in ar_p025 ar_p2550 ar_p5075 ar_p75100 tot {
		sum stud_`l' if year==2002 & balanced==1
		local `l' = r(mean)
		}
		
		 
		
		foreach l in fpo_p025 fpo_p2550 fpo_p5075 fpo_p75100 tot {
		sum stud_`l' if year==2002 & balanced==1
		local `l' = r(mean)
		}
		
		
		
		
		
		
	***TABLE 8 ***********
	
	**NEW UHS ANALYSIS RENTS, INCOME PERCENTILES, N HOUSES ETC
		*merge m:1 chinacity using "$path/Data/1.ChinaData/UHS/UHS_d_0207"
		merge m:1 chinacity using "$path/Data/1.ChinaData/UHS/UHS_d_0207_combined"
		drop if _merge==2
		drop _merge
	
		
		*convert gdp from in 10,000s to in billions
		gen gdpb = gdp/100000
		sum gdpb if year==2002 & balanced==1
		local ymean: di %12.2f r(mean) 
		
		*convert pop from in 10,000s to in millions
		gen oldpopb = oldpop/100
		sum oldpopb if year==2002 & balanced==1
		
		sum gdppc if year==2002 & balanced==1
		
		sum sh_afford_tuition2002 if year==2013 & balanced==1
		
	
						
		
		
		
		/* NEW UHS ANALYSIS, BINNED SCATTER PLOTS
		OCT 19, 2021
		*/
	*** Label the main variables
label var lnc_dvalue_0013 "Exports, 2000-2013"
label var lnc_dvalue_9700 "Exports, 1997-2000" 
label var lnc_numstd_college_9700 "College Students, 1997-2000"
label var lnc_num_college_9700 "Number of Colleges, 1997-2000"
label var lnc_numstd_midschool_9700 "Middle School Students, 1997-2000"
label var lnc_num_midschool_9700 "Number of Middle Schools, 1997-2000"
label var lnc_ppsq_comm_0213 "RE Price Growth (Commercial)" 
label var d_ln_hprice "RE Price Growth (Residential)" 
label var lnc_ppsq_res_0513 "RE Price Growth (Residential)" 
label var lnc_liqdfy_0208 "Income Growth from RE Sales"
label var d_lnhvalue_avg "Property Value Appreciation"
label var d_ln_inc_bus_avg "Income Growth from Own Business"
label var d_totservice_share "Growth in Service Expenditure Shares"
label var d_borrowing_exp "Growth in the Share of Borrowing to Income"
label var d_sh_collect_rent "{&Delta} Share of Rents in Family Income"
label var d_sh_afford_tuition "{&Delta} Share of Households can Afford Tuition"
label var d_ln_inc_housing_avg "{&Delta} Real Estate Income"
label var d_ln_hrent_value_avg "{&Delta} Rental Income"
label var lnc_gdppc_0213 "{&Delta} GDP per Capita"
label var shc_stud_tot_0213 "Growth in Students, 2002-2013"
label var shc_stud_ba_0213 "Growth in Bachelors Students, 2002-2013"
label var shc_stud_ma_0213 "Growth in Masters Students, 2002-2013"
label var shc_stud_dr_0213 "Growth in PhD Students, 2002-2013"
*label var no_outlier_tot_0213 "Growth in Students, 2002-13 (no outliers)"
label var lnc_gdp_0213 "Change in Log(GDP), 2002-2013"
label var lnc_oldpop_0213 "Change in Log(Population), 2002-2013"


	
	
	**MECHANISMS RETURNS TO EDUCATION: TABLE 7
		
		
		*ariel's ntr by skill
		merge m:1 citycode using "$path/Data/6.AnalysisData/IV1_skill/gapsexpweighted97_skill"
		drop if _merge==2
		drop _merge
		
		*mingzhi's ntr by skill
		preserve
			use "$path/Data/6.AnalysisData/Shocks_by_Skill/gapsexp97weighted_XObySk", clear
			keep ntrgap_expweighted citycode type skmatrix
			rename ntrgap_expweighted ntrgap_exw97
			reshape wide ntrgap_exw97, i(citycode type) j(skmatrix,str)
			rename ntrgap_exw97China ntrgap_exw97_chn 
			rename ntrgap_exw97Indonesia ntrgap_exw97_ind
			reshape wide ntrgap_exw97_chn ntrgap_exw97_ind , i(citycode) j(type,str)
			rename ntrgap_exw97_chnSkill ntrgap_exw97_chn_sk
			rename ntrgap_exw97_indSkill ntrgap_exw97_ind_sk
			rename ntrgap_exw97_chnUnskill ntrgap_exw97_chn_un
			rename ntrgap_exw97_indUnskill ntrgap_exw97_ind_un
			compress
			tempfile ntrskill
			save "`ntrskill'"
		restore

		merge m:1 citycode using "`ntrskill'"
		drop if _merge==2
		drop _merge
		
		
	 

	merge 1:1 citycode year using "$path/Data/1.ChinaData/Trade/CHNExport_byskill",
	drop _merge
	
	
		sort citycode year
		foreach x in dv_sk_CHN04_med dv_unsk_CHN04_med dv_sk_CHN04_iqr4 dv_sk_CHN04_iqr3 dv_sk_CHN04_iqr2 ///
					 dv_sk_CHN04_iqr1 dv_sk_IND96_med dv_unsk_IND96_med dv_sk_IND96_iqr4 dv_sk_IND96_iqr3 ///
					 dv_sk_IND96_iqr2 dv_sk_IND96_iqr1 dv_isicmiss {  // Any other variables??
				
			bys citycode: gen lnc_`x'_0413 = (ln(`x') - ln(`x'[_n-9])) if year==2013
			bys citycode: gen lnc_`x'_0013 = (ln(`x') - ln(`x'[_n-13])) if year==2013
					
		}
	
	

		
				
					
				
				
				/* REGRESSION TABLES */
				
				
				replace ntrgap_expw97_sk_CHN04_med  = 0 if ntrgap_expw97_sk_CHN04_med ==.
				replace ntrgap_expw97_unsk_CHN04_med  = 0 if ntrgap_expw97_unsk_CHN04_med ==.
				
								
				label var ntrgap_expw97_sk_CHN04_med "Skilled NTR CHN"
				label var ntrgap_expw97_unsk_CHN04_med "Unskilled NTR CHN"
				label var ntrgap_expw97_sk_CHN04_iqr1 "Skilled NTR Q1 CHN"
				label var ntrgap_expw97_sk_CHN04_iqr2 "Skilled NTR Q2 CHN"
				label var ntrgap_expw97_sk_CHN04_iqr3 "Skilled NTR Q3 CHN"
				label var ntrgap_expw97_sk_CHN04_iqr4 "Skilled NTR Q4 CHN"
				
				label var ntrgap_expw97_sk_IND96_med "Skilled NTR IND"
				label var ntrgap_expw97_unsk_IND96_med "Unskilled NTR IND"
				label var ntrgap_expw97_sk_IND96_iqr1 "Skilled NTR Q1 IND"
				label var ntrgap_expw97_sk_IND96_iqr2 "Skilled NTR Q2 IND"
				label var ntrgap_expw97_sk_IND96_iqr3 "Skilled NTR Q3 IND"
				label var ntrgap_expw97_sk_IND96_iqr4 "Skilled NTR Q4 IND"
				
				
						
				
				
	
				sort citycode year
				*construct network in 2000 as size of student population from each city in 2000
			gen network2000= stud_tot*(year==2000)
			egen stud_tot2000 = max(network2000), by(citycode)
			drop network2000
			
			*interact with ntr instrument
			gen iv_ntr_x_stud_tot2000 = iv_ntr*stud_tot2000
			
			bys citycode: gen stud_tot_2000_2003 = stud_tot[_n-13] + stud_tot[_n-12] +  stud_tot[_n-11] +  stud_tot[_n-10] if year==2013
				gen iv_ntr_x_stud_tot_2000_2003 = iv_ntr * stud_tot_2000_2003
				
				sum iv_ntr if year==2013 & balanced==1, de
				local iqr_ivntr = r(p75) - r(p25)
				
				
			
	sum iv_ntr if year==2013 & balanced==1, de
				local iqr_ivntr = r(p75) - r(p25)
				
eststo G: reg shc_stud_tot_0213 iv_ntr totagenc_percap ivagenc_percap $controls if year==2013 & balanced==1 , cluster(citycode)
eststo GG: reg shc_stud_tot_0213 iv_ntr totagenc_percollstd ivagenc_percollstd $controls if year==2013 & balanced==1 , cluster(citycode)

eststo H: reg shc_stud_tot_0213 iv_ntr totagenc_percap $controls if year==2013 & balanced==1 , cluster(citycode)
				estadd ysumm
				estadd local iqreffect = round(`iqr_ivntr'*_b[iv_ntr]*1000,1)
eststo HH: reg shc_stud_tot_0213 iv_ntr totagenc_percollstd $controls if year==2013 & balanced==1 , cluster(citycode)
				estadd ysumm
				estadd local iqreffect = round(`iqr_ivntr'*_b[iv_ntr]*1000,1)
				
				label var ivagenc_percap "\$PNTR_{c}\$ X \# New Agencies (per student flows)"
				label var totagenc_percap "\# New Agencies (per student flows), Pre-2002"
				label var ivagenc_percollstd "\$PNTR_{c}\$ X \# New Agencies (per 10,000 college students)"
				label var totagenc_percollstd "\# New Agencies (per 10,000 college students), Pre-2002"
	
estadd local ctrls "x": G H GG HH
			
					esttab GG using "$path/Tables/revisions2022/table8_agencies_12may2022collstd.tex", replace b(3) se(3) /// 
						keep(iv_ntr totagenc_percollstd ivagenc_percollstd) star(* 0.10 ** 0.05 *** 0.01) stats(N ctrls, label("Obs." "Controls") fmt(%12.0fc %12.0fc)) ///
						mtitles("\shortstack{Intermediary Study Abroad Agencies}") ///
						label substitute(\_ _) booktabs nonotes nonumber
	
		
			
	**************** APPENDIX ******************
	
***** Annual variation in Exports*******	
	gen postntr = (year > 2001)
	gen postgap = postntr*iv_ntr
	gen postgap_empl = postntr*ntrgap_empweighted90
	gen postwid = postntr*XD_eastasia
	foreach x in contract_cons_exw97 chinatariff00_expw97 inputtariff02_expw97 share_revenue_exw97 {
	gen post`x' = postntr*`x'
	}
		replace pop = pop/1000 //now in millions
		sum postgap if year==2013 & balanced==1, de
				local iqr_ivntr = r(p75) - r(p25)

 * global controls_exports "num_college num_midschool numstd_college numstd_midschool  ln_chinatariff_expw97 ln_inputtariff_expw97 postcontract_cons_exw97 postshare_revenue_exw97 " 
   global controls_exports "pop ln_chinatariff_expw97 postinputtariff02_expw97 postcontract_cons_exw97 postshare_revenue_exw97 " 
label var postgap "Post*NTRGAP"
label var postcontract_cons_exw97 "Post*Contract"
label var postshare_revenue_exw97 "Post*Export Lic"
label var ln_oldpop "Log Population"
label var pop "Population (millions)"
label var ln_chinatariff_expw97 "Annual Import Tariffs"
label var ln_inputtariff_expw97 "Annual Input Tariffs"
label var postinputtariff02_expw97 "Post*Input Tariffs"

	***** Export Regressions using the Pierce and Schott Diff-in-diff (since we have older data for exports)

eststo clear
eststo A:	reghdfe ln_dvalue_usa postgap $controls_exports if sample==1, absorb(citycode year) cluster(citycode)
gen d = e(sample)
estadd ysumm
estadd local iqreffect = round(`iqr_ivntr'*_b[postgap]*100,1)
eststo B:	reghdfe ln_dvalue_europe postgap $controls_exports if d==1, absorb(citycode year) cluster(citycode)
estadd ysumm
estadd local iqreffect = round(`iqr_ivntr'*_b[postgap]*100,1)
eststo C:	reghdfe ln_dvalue_nonusa postgap $controls_exports if d==1, absorb(citycode year) cluster(citycode)
estadd ysumm
estadd local iqreffect = round(`iqr_ivntr'*_b[postgap]*100,1)
eststo D:	reghdfe ln_dvalue_usa postgap $controls_exports if year<2007 & sample==1, absorb(citycode year) cluster(citycode)
gen dd = e(sample)
estadd ysumm
estadd local iqreffect = round(`iqr_ivntr'*_b[postgap]*100,1)
eststo E:	reghdfe ln_dvalue_europe postgap $controls_exports if year<2007 & dd==1, absorb(citycode year) cluster(citycode)
estadd ysumm
estadd local iqreffect = round(`iqr_ivntr'*_b[postgap]*100,1)
eststo F:	reghdfe ln_dvalue_nonusa postgap $controls_exports  if year<2007 & dd==1, absorb(citycode year) cluster(citycode)
estadd ysumm
estadd local iqreffect = round(`iqr_ivntr'*_b[postgap]*100,1)
	**If we use all years, then it is significant for all exports (slightly larger coefficient for the US -- but maybe this goes with our investment story -- over time chinese exporters enter everywhere)
	**It is also significant for the US if we use pre-2006, so consistent with the Handley and Limao paper (but then postntr is just 2002-2005		
			
				
		esttab D E F A B C using "$path/Tables/revisions2022/exports_diffdiff.tex", replace b(3) se(3) /// 
		keep(postgap $controls_exports) star(* 0.10 ** 0.05 *** 0.01) stats(iqreffect ymean N r2, label("\textit{Interquartile Effect:}\\ \textit{$\%$ Change Exports}" "Mean Dep Var." "Obs." "R2") fmt(%12.0fc %12.1fc %12.0fc %12.3fc)) ///
			mtitles("USA-Pre 2008" "EUR-Pre 2008" "Non-USA-Pre 2008" "USA-All" "EUR-All" "Non-USA-All" ) ///
			substitute(\_ _) label  nonotes
			
 


	***** New Firms and Investment Regressions, PS Specification**
	
	eststo clear
	*reghdfe ln_fnumber postgap $controls_exports if sample==1, absorb(citycode year) cluster(citycode)
	*reghdfe ln_fnumber_manuf postgap $controls_exports if sample==1, absorb(citycode year) cluster(citycode)
eststo A:	reghdfe nfirms_rate_manuf postgap $controls_exports if sample==1 , absorb(citycode year) cluster(citycode)
estadd local controls "Yes" , replace
eststo B:	reghdfe nfirms_rate_asip postgap $controls_exports if sample==1 , absorb(citycode year) cluster(citycode)
estadd local controls "Yes" , replace
*eststo C:	reghdfe linvst_longrun postgap $controls_exports if sample==1 & year <2008, absorb(citycode year) cluster(citycode)
estadd local controls "Yes" , replace
eststo C:	reghdfe linvst_tot_rate postgap $controls_exports if sample==1 , absorb(citycode year) cluster(citycode)
estadd local controls "Yes" , replace
eststo D:	reghdfe linvst_equity_rate postgap $controls_exports if sample==1 , absorb(citycode year) cluster(citycode)
estadd local controls "Yes" , replace
gen ddd = e(sample)
eststo E:	reghdfe linvst_lr_rate postgap $controls_exports if sample==1 & ddd==1, absorb(citycode year) cluster(citycode)
estadd local controls "Yes" , replace
	
			esttab B A C D E using "$path/Tables/revisions2022/nfirms_diffdiff.tex", replace b(3) se(3) /// 
		keep(postgap) star(* 0.10 ** 0.05 *** 0.01) stats(controls N r2, label("Controls" "Obs." "R2") fmt( %12.0fc %12.0fc %12.3fc)) ///
			mtitles("New Firms-ASIP" "New Firms-Census" "Tot Investment (rate)" "Capital Apprec. (rate)" "Fixed Investment (rate)") ///
			substitute(\_ _) label  nonotes		

			
			
			
			
			    
		 
***EVENT STUDY DYNAMICS in Main Outcome
		
		replace stud_tot = stud_tot/1000
		gen sh_stud_tot = stud_tot/pop 
				
		forvalues t=2002(1)2013 {
		gen iv_ntrXyear`t' = iv_ntr*(year==`t')
		}
		
		areg sh_stud_tot iv_ntrXyear2002 iv_ntrXyear2003 iv_ntrXyear2004 iv_ntrXyear2005 iv_ntrXyear2006 iv_ntrXyear2007 iv_ntrXyear2008 iv_ntrXyear2009 ///
		                iv_ntrXyear2010 iv_ntrXyear2011 iv_ntrXyear2012 iv_ntrXyear2013 i.year if balanced==1, cluster(citycode) absorb(citycode)
				
	
		sort citycode year
		bys citycode: gen shc_stud_tot_0207 = (stud_tot[_n-6]-stud_tot[_n-11])/pop[_n-11]  if year==2013
		bys citycode: gen shc_stud_tot_0810 = (stud_tot[_n-3]-stud_tot[_n-5])/pop[_n-11] if year==2013
		bys citycode: gen shc_stud_tot_1113 = (stud_tot-stud_tot[_n-2])/pop[_n-11] if year==2013
		
		eststo pd1: reg shc_stud_tot_0207 iv_ntr $controls if year==2013 & balanced==1 & shc_stud_tot_0213!=., cluster(citycode)
		estadd ysumm
		eststo pd2: reg shc_stud_tot_0810 iv_ntr $controls if year==2013 & balanced==1 & shc_stud_tot_0213!=., cluster(citycode)
		estadd ysumm
		eststo pd3: reg shc_stud_tot_1113 iv_ntr $controls if year==2013 & balanced==1 & shc_stud_tot_0213!=., cluster(citycode)
		estadd ysumm
		
		esttab pd1 pd2 pd3 using "$path/Tables/revisions2022/dynamics_2nov2022.tex", replace b(3) se(3) /// 
					keep(iv_ntr $controls) star(* 0.10 ** 0.05 *** 0.01) stats(ymean N r2, label("Mean Dep Var." "Obs." "R2") fmt(%12.3fc %12.0fc %12.3fc)) ///
					mtitles("2002-07" "2008-10" "2011-13")	substitute(\_ _) label booktabs nonotes

